library(ggplot2)
library(ggExtra)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(plyr)
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:plotly':
## 
##     arrange, mutate, rename, summarise
qualis_filename <- "../data/Brazil_CAPES_evaluations.csv"
qualis_data <- read.csv2(file=qualis_filename, sep=',')
colnames(qualis_data) <- c('Issn', 'Title', 'field', 'BR_score')
qualis_data$Issn <- gsub('-', '', qualis_data$Issn)
# Convert A1 to 8, and C to 1. A1 is the best one score.
qualis_data$BR_score <- as.integer(qualis_data$BR_score)*(-1)+9

# ~~~

international_filename <- "../data/Scimagojr2016.csv"
inter_data <- read.csv2(file=international_filename)
inter_data$SJR <- as.numeric(inter_data$SJR)
inter_data$H.index <- as.numeric(inter_data$H.index)

# ~~~

# Merge two dataframe: qualis data from Brazil and SJR international data
data <- merge(x=qualis_data, y=inter_data, by='Issn') # It was retrieved only 153 journals!
data$BR_score <- as.integer(data$BR_score)
data$Total.Refs. <- as.integer(data$Total.Refs.)

# ~~~

data_to_show <- count(qualis_data$BR_score)
data_to_show$type <- 'Original'
aux <- count(data$BR_score)
aux$type <- 'Post-Filtered'
data_to_show <- rbind(data_to_show, aux)
ggplotly(ggplot(data_to_show, aes(x=as.factor(x), y=freq, fill=type))+
           geom_bar(stat = "identity", position=position_dodge2(), alpha=.9) +
           xlab("Level") + ylab("Quantity") +
           ggtitle("Journals score distribuition") +
           scale_fill_discrete(name = "Category") +
           theme_light())

pegar o evento A1 com menor fator de impacto e comparar com o fator de impacto de um A2/B1/etc